Assess DEGs between clusters and look at cluster distribtuion per group

load common libraries

suppressPackageStartupMessages({
  library(tidyverse) 
  library(Seurat)
  library(RColorBrewer)
  library(viridis)
  library(cowplot)
  library(ggridges)
  library(data.table)
  library(ggpubr)
  library(ComplexHeatmap)
  library(readxl)
  library(writexl)
})
## Warning in system("timedatectl", intern = TRUE): running command 'timedatectl'
## had status 1

load SO + set paths

path <- "~/ChronicMemoryGithub_Submission/7_PolyclonalRechallenge/data/"
outs <- paste0(path, "/2_GEX_Analysis/outs")

so <- paste0(path, "7B_SeuratObject_Clustered.rds") %>% readRDS()

set plot themes and color palettes

plot.theme.umap <- theme_cowplot() + theme(plot.title = element_text(hjust = 0.5), 
        #panel.border = element_rect(colour = "black", fill=NA, linewidth = 0.5, color = "grey80"),
        axis.text = element_blank(),
        axis.line = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank()) + 
  theme(plot.title = element_text(hjust = 0.5, face = "plain")) 

plot.theme <- theme_cowplot() + theme(plot.title = element_text(hjust = 0.5, face = "plain"))

clusterpal <- c(   
 
  "SLEC_1" = "#014f63",
  "SLEC_2" = "#62939a",
  "Eff_Mem" = "#65c7ca",
  "MPEC" = "#122452",
  
  "Exh_Eff" = "#ef8577",
  "Exh_EM" = "#e62230",
  "Exh_Term" = "#a11822",

  "Prolif_1" = "#f6d13a",    
  "Prolif_2" = "#f26e36",   
  "ISG" = "#e6af7c"
  )

remove genes function

#function to remove TCR genes from VF lists
remove_genes <- function(features, species, tcr=TRUE, ig=TRUE, cell_cycle=TRUE, mito=TRUE, histone=TRUE, ribosome=TRUE) {
    print(paste("Before filtering:", length(features), "features"))

    if (species == "human") {
        if (tcr) {
            features <- features[!grepl("^TR[ABGD][VDJC]", features)]
        }
        if (ig) {
            features <- features[!grepl("^IG[HLK][VDJC]", features)]
        }
        if (cell_cycle) {
            features <- features[!(features %in% unlist(cc.genes))]
        }
        if (mito) {
            features <- features[!grepl("^MT-", features)]
        }
    }
    if (species == "mouse") {
        if (tcr) {
            features <- features[!grepl("^Tr[abgd][vdjc]", features)]
        }
        if (tcr) {
            features <- features[!grepl("^Tcr[abgd][vdjc]", features)]
        }
        if (ig) {
            features <- features[!grepl("^Ig[hlk][vdjc]", features)]
        }
        if (cell_cycle) {
            features <- features[!(features %in% stringr::str_to_title(unlist(cc.genes)))]
        }
        if (mito) {
            features <- features[!grepl("^Mt-", features)]
        }
      
       if (histone) {
            features <- features[!grepl("^Hist", features)]
       }
      
       if (ribosome) {
            features <- features[!grepl("^Rps", features)]
       }
      
      if (ribosome) {
            features <- features[!grepl("^Rpl", features)]
        }
      
    }

    print(paste("After filtering:", length(features), "features"))
    
    return(features)
}

ID prolif clusters

DimPlot(so, label = T)

DimPlot(so, group.by = "Phase") & NoAxes() 

VlnPlot(so, features = c("S.Score", "G2M.Score"), pt.size = 0)

find DEG per cluster

# diferential genes per cluster
cluster.markers <- FindAllMarkers(so, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, assay = "RNA", features = vf.clean)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
cluster.markers$cell_cluster[cluster.markers$cluster == "0"] <- "SLEC_1"  # Called Teff in the manuscript
cluster.markers$cell_cluster[cluster.markers$cluster == "1"] <- "Exh_Eff"
cluster.markers$cell_cluster[cluster.markers$cluster == "2"] <- "Exh_EM" 
cluster.markers$cell_cluster[cluster.markers$cluster == "3"] <- "SLEC_2"  # Called Teff in the manuscript
cluster.markers$cell_cluster[cluster.markers$cluster == "4"] <- "Eff_Mem"
cluster.markers$cell_cluster[cluster.markers$cluster == "5"] <- "Prolif_1"
cluster.markers$cell_cluster[cluster.markers$cluster == "6"] <- "ISG"
cluster.markers$cell_cluster[cluster.markers$cluster == "7"] <- "Exh_Term"
cluster.markers$cell_cluster[cluster.markers$cluster == "8"] <- "Prolif_2"
cluster.markers$cell_cluster[cluster.markers$cluster == "9"] <- "MPEC"

# write_csv(cluster.markers, file = paste0(outs, "/2C_DegPerCluster.csv"))

rename clusters

DimPlot(so, label = T)

so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "0"] <- "SLEC_1"  # Called Teff in the manuscript
so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "1"] <- "Exh_Eff"
so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "2"] <- "Exh_EM"
so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "3"] <- "SLEC_2"  # Called Teff in the manuscript
so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "4"] <- "Eff_Mem"
so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "5"] <- "Prolif_1"
so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "6"] <- "ISG"
so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "7"] <- "Exh_Term"
so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "8"] <- "Prolif_2"
so@meta.data$cell_cluster[so@meta.data$seurat_clusters == "9"] <- "MPEC"

so$cell_cluster <- factor(so$cell_cluster, levels = names(clusterpal))
Idents(so) <- so$cell_cluster
DimPlot(so,cols = clusterpal)

dim reduction plots - nicer

DimPlot(so, cols = clusterpal) + #+ plot.theme.umap + ggtitle(label = "CD8 T cells") 
  geom_text(data = data.frame(x = 1, y = 2), x = -6, y = -6, label = paste("n =", length(so$barcode), "cells"), size = 4) + 
  plot.theme.umap 

# dataframe for other UMAP based plots
umap.df <- data.frame(so@meta.data,  so@reductions$umap@cell.embeddings) %>% 
  mutate(Group = ifelse(Group == "AcuMem", "Arm:Arm", Group), 
         Group = ifelse(Group == "ChrMem", "Cl13:Arm", Group),
         Group = ifelse(Group == "EndoGP33", "Endo GP33+", Group))

# dim recution plots - split by Group =========================
head(umap.df)
##                      orig.ident nCount_RNA nFeature_RNA clonotype_id
## AAACCTGAGACCACGA-1_1  GP33pos_1       6368         2112  clonotype15
## AAACCTGAGAGTGACC-1_1  GP33pos_1       7321         2452  clonotype35
## AAACCTGAGATGTTAG-1_1  GP33pos_1       6111         2349  clonotype54
## AAACCTGAGCTAAACA-1_1  GP33pos_1       7288         2513   clonotype1
## AAACCTGAGGACATTA-1_1  GP33pos_1       4739         1906 clonotype137
## AAACCTGAGGCATGGT-1_1  GP33pos_1       9352         2762  clonotype14
##                                 barcode frequency   proportion
## AAACCTGAGACCACGA-1_1 AAACCTGAGACCACGA-1        86 0.0141470637
## AAACCTGAGAGTGACC-1_1 AAACCTGAGAGTGACC-1        36 0.0059220266
## AAACCTGAGATGTTAG-1_1 AAACCTGAGATGTTAG-1        22 0.0036190163
## AAACCTGAGCTAAACA-1_1 AAACCTGAGCTAAACA-1       662 0.1088994900
## AAACCTGAGGACATTA-1_1 AAACCTGAGGACATTA-1         5 0.0008225037
## AAACCTGAGGCATGGT-1_1 AAACCTGAGGCATGGT-1        92 0.0151340681
##                                                    cdr3s_aa
## AAACCTGAGACCACGA-1_1 TRB:CASSDVGGNTEVFF;TRA:CAVSVPGTGSNRLTF
## AAACCTGAGAGTGACC-1_1    TRB:CASSLGGARAEQFF;TRA:CAANYGNEKITF
## AAACCTGAGATGTTAG-1_1  TRB:CASSSRGANSDYTF;TRA:CAASYNYGNEKITF
## AAACCTGAGCTAAACA-1_1   TRB:CASSSRDRNTEVFF;TRA:CAVGTQVVGQLTF
## AAACCTGAGGACATTA-1_1    TRB:CASSDHGNTEVFF;TRA:CAVPNYGNEKITF
## AAACCTGAGGCATGGT-1_1      TRB:CASSFSYEQYF;TRA:CATHDTNAYKVIF
##                                                                                                              cdr3s_nt
## AAACCTGAGACCACGA-1_1 TRB:TGTGCCAGCAGTGATGTGGGAGGAAACACAGAAGTCTTCTTT;TRA:TGCGCAGTCAGTGTACCAGGCACTGGGAGTAACAGGCTCACTTTT
## AAACCTGAGAGTGACC-1_1          TRB:TGTGCTAGCAGTTTAGGGGGGGCGAGGGCTGAGCAGTTCTTC;TRA:TGTGCAGCCAACTATGGAAATGAGAAAATAACTTTT
## AAACCTGAGATGTTAG-1_1    TRB:TGTGCTAGCAGTAGCAGGGGCGCAAACTCCGACTACACCTTC;TRA:TGTGCAGCAAGTTATAACTATGGAAATGAGAAAATAACTTTT
## AAACCTGAGCTAAACA-1_1       TRB:TGTGCTAGCAGTAGTAGGGACAGAAACACAGAAGTCTTCTTT;TRA:TGTGCTGTGGGGACACAGGTTGTGGGGCAGCTCACTTTC
## AAACCTGAGGACATTA-1_1          TRB:TGTGCCAGCAGTGATCACGGAAACACAGAAGTCTTCTTT;TRA:TGTGCAGTGCCCAACTATGGAAATGAGAAAATAACTTTT
## AAACCTGAGGCATGGT-1_1                TRB:TGTGCCAGCAGCTTCTCCTATGAACAGTACTTC;TRA:TGTGCTACTCATGACACAAATGCTTACAAAGTCATCTTT
##                      inkt_evidence mait_evidence          TRA_aa
## AAACCTGAGACCACGA-1_1                    TRB:gene CAVSVPGTGSNRLTF
## AAACCTGAGAGTGACC-1_1      TRB:gene                  CAANYGNEKITF
## AAACCTGAGATGTTAG-1_1                              CAASYNYGNEKITF
## AAACCTGAGCTAAACA-1_1                               CAVGTQVVGQLTF
## AAACCTGAGGACATTA-1_1                    TRB:gene   CAVPNYGNEKITF
## AAACCTGAGGCATGGT-1_1                               CATHDTNAYKVIF
##                      TRA_secondary_aa         TRB_aa TRB_secondary_aa
## AAACCTGAGACCACGA-1_1             <NA> CASSDVGGNTEVFF             <NA>
## AAACCTGAGAGTGACC-1_1             <NA> CASSLGGARAEQFF             <NA>
## AAACCTGAGATGTTAG-1_1             <NA> CASSSRGANSDYTF             <NA>
## AAACCTGAGCTAAACA-1_1             <NA> CASSSRDRNTEVFF             <NA>
## AAACCTGAGGACATTA-1_1             <NA>  CASSDHGNTEVFF             <NA>
## AAACCTGAGGCATGGT-1_1             <NA>    CASSFSYEQYF             <NA>
##                                                             TRA_nt
## AAACCTGAGACCACGA-1_1 TGCGCAGTCAGTGTACCAGGCACTGGGAGTAACAGGCTCACTTTT
## AAACCTGAGAGTGACC-1_1          TGTGCAGCCAACTATGGAAATGAGAAAATAACTTTT
## AAACCTGAGATGTTAG-1_1    TGTGCAGCAAGTTATAACTATGGAAATGAGAAAATAACTTTT
## AAACCTGAGCTAAACA-1_1       TGTGCTGTGGGGACACAGGTTGTGGGGCAGCTCACTTTC
## AAACCTGAGGACATTA-1_1       TGTGCAGTGCCCAACTATGGAAATGAGAAAATAACTTTT
## AAACCTGAGGCATGGT-1_1       TGTGCTACTCATGACACAAATGCTTACAAAGTCATCTTT
##                      TRA_secondary_nt
## AAACCTGAGACCACGA-1_1             <NA>
## AAACCTGAGAGTGACC-1_1             <NA>
## AAACCTGAGATGTTAG-1_1             <NA>
## AAACCTGAGCTAAACA-1_1             <NA>
## AAACCTGAGGACATTA-1_1             <NA>
## AAACCTGAGGCATGGT-1_1             <NA>
##                                                          TRB_nt
## AAACCTGAGACCACGA-1_1 TGTGCCAGCAGTGATGTGGGAGGAAACACAGAAGTCTTCTTT
## AAACCTGAGAGTGACC-1_1 TGTGCTAGCAGTTTAGGGGGGGCGAGGGCTGAGCAGTTCTTC
## AAACCTGAGATGTTAG-1_1 TGTGCTAGCAGTAGCAGGGGCGCAAACTCCGACTACACCTTC
## AAACCTGAGCTAAACA-1_1 TGTGCTAGCAGTAGTAGGGACAGAAACACAGAAGTCTTCTTT
## AAACCTGAGGACATTA-1_1    TGTGCCAGCAGTGATCACGGAAACACAGAAGTCTTCTTT
## AAACCTGAGGCATGGT-1_1          TGTGCCAGCAGCTTCTCCTATGAACAGTACTTC
##                      TRB_secondary_nt TRA_v_gene TRA_d_gene TRA_j_gene
## AAACCTGAGACCACGA-1_1             <NA>   TRAV3D-3                TRAJ28
## AAACCTGAGAGTGACC-1_1             <NA>  TRAV14D-1                TRAJ48
## AAACCTGAGATGTTAG-1_1             <NA>   TRAV14-3                TRAJ48
## AAACCTGAGCTAAACA-1_1             <NA>   TRAV9D-3                 TRAJ5
## AAACCTGAGGACATTA-1_1             <NA>    TRAV7-6                TRAJ48
## AAACCTGAGGCATGGT-1_1             <NA>   TRAV8N-2                TRAJ30
##                      TRA_c_gene TRB_v_gene TRB_d_gene TRB_j_gene TRB_c_gene
## AAACCTGAGACCACGA-1_1       TRAC   TRBV13-3               TRBJ1-1      TRBC1
## AAACCTGAGAGTGACC-1_1       TRAC     TRBV29               TRBJ2-1      TRBC2
## AAACCTGAGATGTTAG-1_1       TRAC     TRBV17               TRBJ1-2      TRBC1
## AAACCTGAGCTAAACA-1_1       TRAC     TRBV17               TRBJ1-1      TRBC1
## AAACCTGAGGACATTA-1_1       TRAC   TRBV13-3               TRBJ1-1      TRBC1
## AAACCTGAGGCATGGT-1_1       TRAC     TRBV10               TRBJ2-7      TRBC2
##                      TRA_secondary_v_gene TRA_secondary_d_gene
## AAACCTGAGACCACGA-1_1                 <NA>                 <NA>
## AAACCTGAGAGTGACC-1_1                 <NA>                 <NA>
## AAACCTGAGATGTTAG-1_1                 <NA>                 <NA>
## AAACCTGAGCTAAACA-1_1                 <NA>                 <NA>
## AAACCTGAGGACATTA-1_1                 <NA>                 <NA>
## AAACCTGAGGCATGGT-1_1                 <NA>                 <NA>
##                      TRA_secondary_j_gene TRA_secondary_c_gene
## AAACCTGAGACCACGA-1_1                 <NA>                 <NA>
## AAACCTGAGAGTGACC-1_1                 <NA>                 <NA>
## AAACCTGAGATGTTAG-1_1                 <NA>                 <NA>
## AAACCTGAGCTAAACA-1_1                 <NA>                 <NA>
## AAACCTGAGGACATTA-1_1                 <NA>                 <NA>
## AAACCTGAGGCATGGT-1_1                 <NA>                 <NA>
##                      TRB_secondary_v_gene TRB_secondary_d_gene
## AAACCTGAGACCACGA-1_1                 <NA>                 <NA>
## AAACCTGAGAGTGACC-1_1                 <NA>                 <NA>
## AAACCTGAGATGTTAG-1_1                 <NA>                 <NA>
## AAACCTGAGCTAAACA-1_1                 <NA>                 <NA>
## AAACCTGAGGACATTA-1_1                 <NA>                 <NA>
## AAACCTGAGGCATGGT-1_1                 <NA>                 <NA>
##                      TRB_secondary_j_gene TRB_secondary_c_gene chains
## AAACCTGAGACCACGA-1_1                 <NA>                 <NA>     ab
## AAACCTGAGAGTGACC-1_1                 <NA>                 <NA>     ab
## AAACCTGAGATGTTAG-1_1                 <NA>                 <NA>     ab
## AAACCTGAGCTAAACA-1_1                 <NA>                 <NA>     ab
## AAACCTGAGGACATTA-1_1                 <NA>                 <NA>     ab
## AAACCTGAGGCATGGT-1_1                 <NA>                 <NA>     ab
##                      percent.mt keep multiseq_class      Group      Rep     Tet
## AAACCTGAGACCACGA-1_1  1.4447236 TRUE       ChrMem_1   Cl13:Arm        1 GP33pos
## AAACCTGAGAGTGACC-1_1  1.7483950 TRUE       AcuMem_3    Arm:Arm        3 GP33pos
## AAACCTGAGATGTTAG-1_1  0.9818360 TRUE       ChrMem_4   Cl13:Arm        4 GP33pos
## AAACCTGAGCTAAACA-1_1  1.8798024 TRUE       ChrMem_5   Cl13:Arm        5 GP33pos
## AAACCTGAGGACATTA-1_1  0.6752479 TRUE       AcuMem_2    Arm:Arm        2 GP33pos
## AAACCTGAGGCATGGT-1_1  1.6467066 TRUE       EndoGP33 Endo GP33+ EndoGP33 GP33pos
##                           Tet_Group integrated_snn_res.0.3 seurat_clusters
## AAACCTGAGACCACGA-1_1 GP33pos_ChrMem                      2               1
## AAACCTGAGAGTGACC-1_1 GP33pos_AcuMem                      0               3
## AAACCTGAGATGTTAG-1_1 GP33pos_ChrMem                      1               2
## AAACCTGAGCTAAACA-1_1 GP33pos_ChrMem                      1               2
## AAACCTGAGGACATTA-1_1 GP33pos_AcuMem                      3               3
## AAACCTGAGGCATGGT-1_1      Endo_GP33                      0               3
##                           S.Score   G2M.Score Phase singleR_label
## AAACCTGAGACCACGA-1_1 -0.007121965 -0.13607663    G1       T cells
## AAACCTGAGAGTGACC-1_1  0.007847229 -0.09428068     S       T cells
## AAACCTGAGATGTTAG-1_1 -0.034294115 -0.03055381    G1       T cells
## AAACCTGAGCTAAACA-1_1 -0.023339142 -0.10426318    G1       T cells
## AAACCTGAGGACATTA-1_1 -0.073484390 -0.09070658    G1       T cells
## AAACCTGAGGCATGGT-1_1 -0.037144770 -0.15022796    G1       T cells
##                      integrated_snn_res.0.4 cell_cluster     UMAP_1     UMAP_2
## AAACCTGAGACCACGA-1_1                      1      Exh_Eff -2.2335380 -1.2231304
## AAACCTGAGAGTGACC-1_1                      3       SLEC_2 -1.2279615  3.1403521
## AAACCTGAGATGTTAG-1_1                      2       Exh_EM -0.1569732 -4.9479560
## AAACCTGAGCTAAACA-1_1                      2       Exh_EM -2.3348473 -5.6089519
## AAACCTGAGGACATTA-1_1                      3       SLEC_2  0.4887471  0.8196098
## AAACCTGAGGCATGGT-1_1                      3       SLEC_2 -0.8211082  2.6560866
# vector of groups
var.list <- c("Arm:Arm" ,"Cl13:Arm", "Endo GP33+") 

plots <- lapply(var.list, function(x) {
  subset <- umap.df %>% subset(Group == x)
  umap <- ggplot(umap.df, aes(x = UMAP_1, y = UMAP_2)) +
    ggrastr::geom_point_rast(color = "grey90", size = 0.1, raster.dpi = 600) +
    ggrastr::geom_point_rast(data = subset, size = 0.1, aes(color = cell_cluster), raster.dpi = 600) + 
    ggtitle(x) + 
    scale_color_manual(values = clusterpal)  + plot.theme.umap + NoLegend() 
  
   bar <- subset %>% dplyr::count(cell_cluster, Group) %>% mutate(n = n/sum(n)) %>%
    ggplot(aes(y = n, fill = cell_cluster, x = Group)) + geom_col(show.legend = F) + 
    plot.theme + 
    labs(x = element_blank(), y = "Fraction of Cells") + 
    theme(axis.text.y = element_blank(), axis.ticks.y = element_blank()) +
    scale_y_continuous(expand = expansion(mult = c(0.02, 0.02)), breaks=c(0, 0.5, 1)) + 
    scale_fill_manual(values = clusterpal) + coord_flip()
  
   plot_grid(umap, bar, rel_heights = c(1, 0.3), ncol = 1) %>% return()

})

do.call(plot_grid, c(plots, ncol = 2))

# plot out frequencies for all samples
umap.df %>% count(Group, Tet, Rep, cell_cluster) %>% 
  mutate(Tet = ifelse(Tet == "GP33pos", "GP33+", "GP33-")) %>%
  mutate(Mouse = paste(Group, Tet, Rep, sep = "_")) %>% 
  mutate(Mouse = ifelse(Mouse == "Endo GP33+_GP33+_EndoGP33", "Endo GP33+", Mouse)) %>%
  group_by(Mouse) %>% mutate(n = n/sum(n)) %>% 
  ggplot(aes(x = Mouse, y = 100*n, fill = cell_cluster)) + 
  geom_col(show.legend = F) + 
  plot.theme + #rotate_x_text(90) +
  scale_fill_manual(values = clusterpal) + 
  labs(x = element_blank(), y = "% of Cells") +
  scale_y_continuous(expand = expansion(mult = c(0.0, 0.05))) + coord_flip()

DEG plots

# feature plots for well charactergorized genes
feat.plots <- FeaturePlot(so, 
                          features = c("Pdcd1", "Klrg1", "Cxcr3", "Tcf7"),
                          # cols =  c("grey", brewer.pal(4, "Reds")[c(2:4)]),
                          cols =  viridis(n = 9, option = "magma"),
                          order = T,
                          ncol = 5, pt.size = 0.01) & plot.theme.umap & NoLegend()

# legend
feat.legend <- cowplot::get_legend(FeaturePlot(so, features ="Cx3cr1", cols =  viridis(n = 9, option = "magma")))

plot_grid(feat.plots, feat.legend, rel_widths = c(13,1))

# ggsave(filename = "2C - Feature Plots.png", path = outs, height = 2.7, width = 10)

top genes heatmap

# get top 5 DEG per cluser
top5 <- cluster.markers %>% group_by(cluster) %>% slice_max(order_by = avg_log2FC, n = 5, with_ties = F)

hm.df <- DotPlot(so, features = unique(top5$gene))$data %>% 
  select(c(avg.exp, features.plot, id)) %>% 
  pivot_wider(names_from = id, values_from = avg.exp) %>% 
  as.data.frame() %>%  `rownames<-`(.[,1]) %>% select(-features.plot) %>%
  t() %>% scale()

ha.clust <- rowAnnotation(`Cluster` = rownames(hm.df), 
                        col = list(`Cluster` = clusterpal),
                        show_legend = F)

ha.clust
## A HeatmapAnnotation object with 1 annotation
##   name: heatmap_annotation_0 
##   position: row 
##   items: 10 
##   width: 5mm 
##   height: 1npc 
##   this object is subsetable
##   14.4069666666667mm extension on the bottom 
## 
##     name annotation_type color_mapping width
##  Cluster discrete vector  user-defined   5mm
hm.top5 <- hm.df %>% 
  Heatmap(name = "Z-score", 
          right_annotation = ha.clust, 
          col = viridis(n = 9, option = "magma") ,
          # col = rev(brewer.pal(9, "RdYlBu")),
          width = unit(7, "in"), height = unit(2.2, "in"), 
          show_column_dend = F, show_row_names = T, row_dend_width = unit(0.15, "in"), 
          cluster_rows = T, cluster_columns = T, 
          border = T
        )

# pdf(file = paste0(outs, "/2C - HM Top5 DEG per cluster.pdf"), width = unit(12, "in"), height = unit(4, "in"))
hm.top5

# dev.off()

add common exh. module score from Daniel Et al Nat Immuno (https://www.nature.com/articles/s41590-022-01337-5)

# genes can be found in Supplemental tables of our manuscript
common.ex.genes <- read.csv("~/Resources/LCMV atlas tables/LCMV_common_ex_score.csv")$Common_Ex_Module
common.ex.genes <- common.ex.genes[common.ex.genes %in% vf.clean] %>% list()

DefaultAssay(so) <- "RNA"
so <- AddModuleScore(so, features = common.ex.genes, name = "CommonExMod", assay = "RNA")

VlnPlot(so, features = "CommonExMod1",  cols = clusterpal, pt.size = 0, sort = "decreasing") & plot.theme & rotate_x_text(45) & NoLegend()

FeaturePlot(so, features = "CommonExMod1")

common.ex.genes[[1]]
##  [1] "Gzmk"      "Gimap7"    "Adam19"    "Apol7e"    "Nedd9"     "Irf1"     
##  [7] "Cd38"      "Cish"      "Adgrg1"    "Serpina3g" "Fgl2"      "Fasl"     
## [13] "Cxcr6"     "AW112010"  "Arl6ip1"   "Gng2"      "Cd8a"      "Sh2d2a"   
## [19] "Tox"       "Tigit"     "Lag3"      "Pdcd1"
# heatmap of common Exh Genes
hm.df <- DotPlot(so, features = common.ex.genes[[1]])$data %>% 
  select(c(avg.exp, features.plot, id)) %>% 
  pivot_wider(names_from = id, values_from = avg.exp) %>% 
  as.data.frame() %>%  `rownames<-`(.[,1]) %>% select(-features.plot) %>%
  t() %>% scale()

ha.clust <- columnAnnotation(`Cluster` = rownames(hm.df), 
                        col = list(`Cluster` = clusterpal),
                        show_legend = F)

hm.df %>% t%>% 
  Heatmap(name = "Z-score", 
          top_annotation = ha.clust, 
          col = rev(brewer.pal(9, "RdYlBu")),
          width = unit(2, "in"), height = unit(4.7, "in"), 
          show_column_dend = F, show_row_names = T, row_dend_width = unit(0.15, "in"), 
          cluster_rows = T, cluster_columns = T, 
          border = T
        )

also add module scores for Tex-Term, Teff, Tmem from Daniel Et al Nat Immuno

# read in genes from module scores (Daniel et al Table S1)
## we only use the effector module in our analysis - see our table S7
genes.tables <- read_xls("~/Resources/LCMV atlas tables/Supplemental Table 1.xls") 

# take top 50 genes fgrom each 
term.genes <- genes.tables %>% filter(cluster == "Texterm") %>% slice_max(n = 50, order_by = avg_log2FC) %>% select(gene) %>% c() 
mem.genes <- genes.tables %>% filter(cluster == "Tmem") %>% slice_max(n = 50, order_by = avg_log2FC) %>% select(gene) %>% c() 
eff.genes <- genes.tables %>% filter(cluster == "Teff") %>% slice_max(n = 50, order_by = avg_log2FC) %>% select(gene) %>% c() 

# filter gby 
module.lists <- list(
  "Term_Module" = term.genes$gene[term.genes$gene %in% vf.clean],
  "Mem_Module" = mem.genes$gene[mem.genes$gene %in% vf.clean],
  "Teff_Module" = eff.genes$gene[eff.genes$gene %in% vf.clean]
)

so <- AddModuleScore(so, features = module.lists, name = names(module.lists), replace = T, assay = "RNA")

# plot out Moldule scores
VlnPlot(so, features = c("Term_Module1", "Mem_Module2", "Teff_Module3"),  cols = clusterpal, pt.size = 0, sort = "decreasing", ncol = 3) & 
  plot.theme & rotate_x_text(90) & NoLegend()

# ggsave(filename = "2B - ModuleScoreVlns.pdf", path = outs, height = 9, width = 3)

# print out mopdules
print("Tmem")
## [1] "Tmem"
module.lists$Mem_Module
##  [1] "Il7r"    "Sidt1"   "Gpr183"  "Gzmm"    "Emb"     "Gramd3"  "Atp1a1" 
##  [8] "Ier3"    "Cxcr3"   "Lcn4"    "Fam46a"  "Tecpr1"  "Sit1"    "Tob1"   
## [15] "Fam46c"  "Gm35037" "Rcn3"    "Cd44"    "Tmem154" "Wfikkn2" "Dennd4c"
## [22] "Dennd4a" "Faah"    "Clip1"   "Sntb2"
print("Teff")
## [1] "Teff"
module.lists$Teff_Module
##  [1] "Klrg1"   "Ccr2"    "Ly6c2"   "S100a10" "Itgax"   "Vim"     "Pycard" 
##  [8] "Anxa1"   "Tagln2"  "Epsti1"  "Jak1"    "Itgb7"   "Gna15"   "Wdr95"  
## [15] "Lsp1"    "Itga4"   "Cd8b1"   "Actb"    "Cd52"    "Ctla2b"  "Glipr2" 
## [22] "Tmsb4x"  "Coro1a"  "Gm2a"    "Hsd11b1" "Apobec2" "Fcgrt"
print("Tex")
## [1] "Tex"
module.lists$Term_Module
##  [1] "Cd7"           "Gzma"          "Cd160"         "Rgs1"         
##  [5] "Fos"           "Dusp1"         "Chn2"          "Adgrg1"       
##  [9] "Jun"           "Gzmk"          "Cxcr6"         "Lax1"         
## [13] "Cd244"         "Serpina3g"     "Actn2"         "Nedd9"        
## [17] "Fasl"          "Abcb9"         "Ppp1r15a"      "Junb"         
## [21] "AC163354.1"    "5830405F06Rik" "Gimap7"        "Cd200r1"      
## [25] "Cd38"          "Apol7e"        "Cish"          "Fgl2"         
## [29] "Pglyrp2"       "2900026A02Rik" "Adam19"        "Glrx"         
## [33] "Tcrg-C2"       "Vmp1"          "Mxd4"

after adding module scores, save RDS

# saveRDS(so, file = paste0(path, "/seurat_objects/7C_SeuratObject_ClusteredAndIDd.rds"))